# -*- coding: utf-8 -*-
import datetime
import re
import demjson
from utils.return_string import Return_String as rs
import scrapy
from guige_spider.model.ggreptileinfo import GgReptileInfo
from guige_spider.model.financialproducts import FinancialProducts
from guige_spider.model import mysqlSession


class CebAnnSpider(scrapy.Spider):
    '''
        光大银行-爬取公告列表
        '''

    name = 'cebbank_fxan'
    allowed_domains = ['http://www.cebbank.com/site/ceb/index.html']
    url_part_head = 'http://www.cebbank.com'
    start_urls = ['http://www.cebbank.com/site/gryw/yglc/lccp38/27636260/59894835/index.html'  # 现金类首页
                  ]
    total_li = 1
    total_sir_num = 1
    total_pro_num = 1
    curr_row = 1
    gg_url = ''
    sir_page = 1
    pro_page = 1
    curr_page = 1
    sir_url = ''
    pro_url = ''
    session = mysqlSession()
    gri = session.query(GgReptileInfo).filter_by(pcmc=name).first()

    def parse(self, response):
        if self.gri.pczt == 1:
            self.crawler.engine.close_spyder(self, 'craw close spyder')
        elif self.gri.pczt == 0:
            result = response.body_as_unicode()[1:-1]
            sir_data = re.findall(r'<li class="class_A', str(result))
            pro_data = re.findall(r'<li  class="class_B', str(result)) # 请忽略本行警告
            if len(sir_data) > 0:
                total_sir_num = len(sir_data)
                self.total_sir_num = total_sir_num
            if len(pro_data) > 0:
                total_pro_num = len(pro_data)
                self.total_pro_num = total_pro_num
            if self.sir_page <= self.total_sir_num:
                if self.pro_page <= self.total_pro_num:
                    self.pro_url = response.xpath(
                        'normalize-space(//ul[@class="left_zhong"]//li[' + str(self.sir_page) + ']//ul//li[' + str(
                            self.pro_page) + ']//a//@href)').extract_first()
                    self.pro_url = self.url_part_head + self.pro_url
                    if ('27636330' in self.pro_url):
                       self.sir_page = self.sir_page + 1
                       self.pro_page = 1
                       yield scrapy.Request(url=self.start_urls[0],dont_filter=True,callback=self.parse)
                    elif '27636369' in self.pro_url:
                        self.crawler.engine.close_spyder(self,'爬取结束')
                    else:
                        yield scrapy.Request(
                            url=self.pro_url, callback=self.parse_detial_pro, dont_filter=True)
                elif self.pro_page> self.total_pro_num:
                    self.sir_page = self.sir_page + 1
                    self.pro_page = 1
                    next_url = self.start_urls[0]
                    yield scrapy.Request(url=next_url,dont_filter=True,callback=self.parse)
            else:
                self.crawler.engine.close_spyder('爬取结束')

    def parse_detial_pro(self, response):
        # 发行公告更多页面
        test = re.findall(r'http.*.',str(response)[1:-1])[0]
        if test == 'http://www.cebbank.com':
            self.sir_page = self.sir_page + 1
            next_url = self.start_urls[0]
            yield scrapy.Request(url=next_url,dont_filter=True,callback=self.parse)
        else:
            try:
                fxgg_more_url = self.url_part_head + response.xpath(
                    '//div[@class="grcx_r fl column"]//div[1]//div[@class="gg_tit"]//a//@href'
                ).extract_first()
                self.gg_url = fxgg_more_url
                yield scrapy.Request(
                    fxgg_more_url,
                    callback=self.parse_detail_fxgg_list,
                    dont_filter=True
                )
            except TypeError:
                self.logger.error(Exception)
                self.logger.warning('============No fxgg============')

    def parse_detail_fxgg_list(self, response):
        result = response.body_as_unicode()[1:-1]
        item = {}
        total_li = len(re.findall(r'gg_nr fl', str(result)))
        self.total_li = total_li
        total_page = 1
        try:
            page = response.xpath('normalize-space(//*[@id="zys"])').extract_first()
            total_page = int(re.findall(r'\d+', page)[-1])
        except:
            self.logger.warning('无页面数据')
        if total_li == 1:
            gg_exist = response.xpath('normalize-space(//ul[@class="gg_right_ul"]//li[1])').extract_first()
            if '发行公告' in gg_exist:
                # item['gglx'] = '发行公告'
                gg_url = self.url_part_head + response.xpath(
                    '//ul[@class="gg_right_ul"]//li[1]//a//@href'
                ).extract_first()
                yield scrapy.Request(
                    gg_url,
                    callback=self.parse_detail_fxgg_info,
                    meta={'item': item},
                    dont_filter=True
                )
        elif total_li > 1:
            if self.curr_row<=total_li:
                print(self.curr_row)
                gg_exist = response.xpath(
                    'normalize-space(//ul[@class="gg_right_ul"]//li[' + str(self.curr_row) + '])').extract_first()
                if ('发行公告' in gg_exist) or ('成立公告' in gg_exist):
                    # item['gglx'] = '发行公告'
                    gg_url = self.url_part_head + response.xpath(
                        '//ul[@class="gg_right_ul"]//li[' + str(self.curr_row) + ']//a//@href'
                    ).extract_first()
                    item['sjdz'] = gg_url
                    yield scrapy.Request(
                        gg_url,
                        callback=self.parse_detail_fxgg_info,
                        meta={'item': item},
                        dont_filter=True
                    )
            elif total_page >1:
                self.curr_page = self.curr_page + 1
                if self.curr_page <=total_page:
                    next_page_url = response.xpath('normalize-space(//div[@id="ceb_fy"]//a[2]//@tagname)').extract_first()
                    next_url = self.url_part_head + next_page_url
                    yield scrapy.Request(url=next_url,dont_filter=True,callback=self.parse_detail_fxgg_list)
                else:
                    next_url = self.start_urls[0]
                    self.pro_page = self.pro_page + 1
                    self.curr_page = 1
                    yield scrapy.Request(url=next_url, dont_filter=True, callback=self.parse)
            elif total_page == 1:
                self.curr_row = 1
                self.pro_page = self.pro_page +1
                next_url = self.start_urls[0]
                yield scrapy.Request(url=next_url,dont_filter=True,callback=self.parse)
        else:
            self.pro_page = self.pro_page + 1
            next_url = self.start_urls[0]
            yield scrapy.Request(url=next_url,dont_filter=True,callback=self.parse)
    def parse_detail_fxgg_info(self, response):
        print(response)
        text_table_symble = False
        item = response.meta['item']
        result = response.body_as_unicode()[1:-1]
        table_data = response.xpath('//div[@class="xilan_con"]//table//tbody').extract_first()
        page_data = response.xpath('//div[@class="xilan_con"]').extract_first()
        total_p = len(re.findall(r'<p',page_data))
        if (table_data == '') or (table_data == None):
            self.curr_row = self.curr_row + 1
            next_url = self.gg_url
            yield scrapy.Request(url=next_url, dont_filter=True, callback=self.parse_detail_fxgg_list)
        else:
            total_tr = len(re.findall(r'<tr>', table_data))
            item['sjly'] = 'cebbank'
            for i in range(1, total_tr + 1):
                td_content_1 = response.xpath(
                    'normalize-space(//div[@class="xilan_con"]//table//tbody//tr[' + str(i) + ']//td[1])').extract_first()
                td_content_2 = response.xpath(
                    'normalize-space(//div[@class="xilan_con"]//table//tbody//tr[' + str(i) + ']//td[2])').extract_first()
                if td_content_1 == '产品名称':
                    if td_content_2 == '产品代码':
                        text_table_symble = True
                        break
                    else:
                        item['cpmc'] = td_content_2
                        if '光银现金' in td_content_2:
                            item['cpxl'] = '光银现金'
                if td_content_1 == '产品编号':
                    item['cpdm'] = td_content_2
                    item['bsm'] = item['sjly'] + item['cpdm']
                if td_content_1 == '理财产品登记编码':
                    item['lccpdjbm'] = rs(td_content_2).lccpdjbm_tools()
                if td_content_1 == '发行人/管理人':
                    item['fxjg'] = td_content_2
                if td_content_1 in ['受托资产管理银行', '境内托管人']:
                    item['tgr'] = td_content_2
                if td_content_1 == '产品类型':
                    item['cplx'] = td_content_2
                if td_content_1 == '超额收益分配':
                    if '超出业绩比较基准' in td_content_2:
                        item['fdglfjfjc'] = item['yqsyl']
                        bl = re.findall(r'\d+%', td_content_2)
                        if len(bl) == 2:
                            item['fdglfbl'] = bl[-1]
                if td_content_1 == '收益类型':
                    item['cplx'] = td_content_2
                if td_content_1 == '业绩比较基准':
                    if '7天通知存款利率' in td_content_2:
                        item['yqsyl'] = '中国人民银行公布的7天通知存款利率'
                    elif '中国人民银行公布的1年期定期存款利率' in td_content_2:
                        item['yqsyl'] = '中国人民银行公布的1年期定期存款利率'
                    else:
                        item['yqsyl'] = rs(td_content_2).single_percent()
                if (td_content_1 == '合格投资者范围') or (td_content_1 == '投资者范围'):
                    item['fxdx'] = td_content_2
                if td_content_1 == '预约认购期':
                    mjrq = rs(td_content_2).area_data_tools()
                    if len(mjrq) == 2:
                        item['mjqsrq'] = mjrq[0]
                        item['mjjsrq'] = mjrq[1]
                if (td_content_1 == '成立日') or (td_content_1 == '产品成立日'):
                    item['lcjhyjclr'] = rs(td_content_2).singel_date_tools()
                if td_content_1 == '封闭期':
                    item['yzfs'] = '定期开放式'
                if '产品募集期'in td_content_1:
                    try:
                        mjrq = rs(td_content_1).area_data_tools()
                        if len(mjrq) == 2:
                            item['mjqsrq'] = mjrq[0]
                            item['mjjsrq'] = mjrq[1]
                    except:
                        self.logger.error(Exception)
                        self.logger.warning('mjrq Error')
                if td_content_1 == '产品运作模式':
                    item['yzfs'] = td_content_2
                if (td_content_1 == '募集方式') or (td_content_1 == '产品募集方式'):
                    item['mjfs'] = td_content_2
                if (td_content_1 == '开放申购/赎回日') or (td_content_1 == '定期开放周期'):
                    item['kfq'] = td_content_2
                if td_content_1 in ['产品发行机构','发行机构']:
                    item['fxjg'] = td_content_2
                if td_content_1 == '认购费':
                    item['rgf'] = rs(td_content_2).single_percent()
                if (td_content_1 == '募集金额上限'):
                    try:
                        if '万' in td_content_2:
                            gmsx = rs(td_content_2).rgqdje_tools()
                            item['fxgmsx'] = float(gmsx) / 10000
                        elif '亿' in td_content_2:
                            gmsx = re.findall(r'\d+', td_content_2)[0]
                            item['fxgmsx'] = gmsx
                    except IndexError:
                        self.logger.error(Exception)
                        self.logger.warning('============fxgmsx data error==============')
                if (td_content_1 == '募集金额下限'):
                    gmsx = rs(td_content_2).rgqdje_tools()
                    item['fxgmxx'] = float(gmsx) / 10000
                if (td_content_1 == '认/申购追加金额') or (td_content_1 == '追加认/申购最低金额'):
                    try:
                        item['dzje'] = re.findall(r'\d+', td_content_2)[0]
                    except IndexError:
                        self.logger(Exception)
                        self.logger.warning('============ dzje data error =================')
                if td_content_1 == '认购费/申购费/赎回费':
                    if td_content_2 == '无':
                        item['sgf'] = '0'
                        item['shf'] = '0'
                        item['rgf'] = '0'
                if td_content_1 == '产品存续期':
                    if td_content_2 == '长期':
                        item['lccpqx'] = '长期'
                        item['cpzt'] = '存续'
                    elif '无固定续存续期' in td_content_2:
                        item['lccpqx'] = '无固定期限'
                    else:
                        item['lccpqx'] = td_content_2
                if td_content_1 == '开放日':
                    item['kfq'] = td_content_2
                if (td_content_1 == '投资及收益币种') or (td_content_1 == '本金及收益币种') or (
                        td_content_1 == '募集币种'):
                    item['lcbz'] = td_content_2
                if (td_content_1 == '起点金额及递增金额') or (td_content_1 == '起点及递增金额'):
                    je_data = re.findall(r'\d+', td_content_2)
                    if len(je_data) == 2:
                        try:
                            item['rgqdje'] = je_data[0]
                        except Exception as e:
                            self.logger.error(e)
                            self.logger.warning('=========qdje data error=========')
                        try:
                            item['dzje'] = je_data[1]
                        except Exception as e:
                            self.logger.error(e)
                            self.logger.warning('=========dzje data error=========')
                if td_content_1 == '起点金额及预期年化收益率':
                    item['yqsyl'] = rs(td_content_2).single_percent()
                    try:
                        item['rgqdje'] = rs(td_content_2).rgqdje_tools()
                    except:
                        self.logger.error(Exception)
                        self.logger.warning('rgqdje error')
                if td_content_1 == '产品风险星级':
                    item['fxpj'] = rs(td_content_2).fxpj_tools()
                if td_content_1 == '投资周期':
                    item['lccpqx'] = td_content_2
                if td_content_1 == '开放申购日':
                    item['kfq'] = td_content_2
                if td_content_1 == '递增金额':
                    item['dzje'] = td_content_2
                if td_content_1 == '收益计算方式':
                    item['syjsjc'] = td_content_2
                if td_content_1 == '销售渠道':
                    item['xsqd'] = td_content_2
                if td_content_1 == '销售服务费':
                    item['xsf'] = rs(td_content_2).single_percent()
                if td_content_1 == '产品份额面值':
                    item['dwje'] = td_content_2
                if td_content_1 == '申购、赎回费':
                    if td_content_2 == '无':
                        item['sgf'] = '0'
                        item['shf'] = '0'
                if td_content_1 == '认（申）购费':
                    if '免认（申）购费' in td_content_2:
                        item['rgf'] = '0'
                        item['sgf'] = '0'
                if td_content_1 == '赎回费':
                    if '免赎回费' in td_content_2:
                        item['shf'] = 0
                if td_content_1 == '产品到期日':
                    item['lcjhyjdqr'] = rs(td_content_2).singel_date_tools()
                if '债权交易服务费' in td_content_1:
                    item['xsf'] = rs(td_content_2).single_percent()
                if '质押' in td_content_1:
                    if '不可办理质押' in td_content_2:
                        item['cpzy'] = '不可质押'
                    elif '可办理质押' in td_content_2:
                        item['cpzt'] = '可以质押·'
                if (td_content_1 == '管理费（年化）') or (td_content_1 == '管理费'):
                    try:
                        item['gdglf'] = rs(td_content_2).single_percent()
                    except IndexError:
                        self.logger.error(Exception)
                        self.logger.warning('=========gdglf data error=========')
                if '募集规模' in td_content_1:
                    item['mjgm'] = td_content_2
                if (td_content_1 == '托管费') or (td_content_1 == '托管费（年化）'):
                    try:
                        item['tgf'] = rs(td_content_2).single_percent()
                    except IndexError:
                        self.logger.error(Exception)
                        self.logger.warning('=========tgf data error=========')
                if td_content_1 == '提前终止':
                    if '光大银行有权' in td_content_2:
                        item['tqzz'] = '光大银行有权终止'
                    if '不可抗力因素' in td_content_2:
                        item['tqzz'] = '如触发终止条款，光大有权种猪'
                if td_content_1 == '本金返还方式':
                    if '本金等额分期偿还' in td_content_2:
                        item['bjbz'] = '本金等额分期偿还'
            if text_table_symble == True:
                for j in range(1,total_p+1):
                    p_content = response.xpath(
                        'normalize-space(//div[@class="xilan_con"]//p['+str(j)+'])').extract_first()
                    if '最长持有时间为365天' in p_content:
                        item['lccpqx'] = '365'
                    if '产品递增金额：' in p_content:
                        try:
                            item['dzje'] = p_content.split('产品递增金额：')[1].split('\n')[0]
                        except:
                            self.logger.error(Exception)
                            self.logger.warning('dzje Error')
                    if '销售渠道：' in p_content:
                        try:
                            item['xsqd'] = p_content.split('销售渠道：')[1].split('\n')[0]
                        except:
                            self.logger.error(Exception)
                            self.logger.warning('xsqd Error')
                    if '每日开放' in p_content:
                        item['kfq'] = '每日开放'
            try:
                if '非保本' in item['cplx']:
                    item['bjbz'] = '不保证本金'
            except KeyError:
                self.logger.error(Exception)
                self.logger.warning('cplx Key Error')
            yield item
            if self.total_li == 1:
                self.curr_row = 1
                next_url = self.start_urls[0]
                self.pro_page = self.pro_page + 1
                yield scrapy.Request(url=next_url,dont_filter=True,callback=self.parse)
            else:
                self.curr_row = self.curr_row + 1
                next_url = self.gg_url
                yield scrapy.Request(url=next_url, dont_filter=True, callback=self.parse_detail_fxgg_list)

